package org.languagetool.dev.conversion;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.languagetool.dev.conversion.cg.CgCompositeTag;
import org.languagetool.dev.conversion.cg.CgContextualTest;
import org.languagetool.dev.conversion.cg.CgContextualTest.POS;
import org.languagetool.dev.conversion.cg.CgGrammar;
import org.languagetool.dev.conversion.cg.CgRule;
import org.languagetool.dev.conversion.cg.CgSet;
import org.languagetool.dev.conversion.cg.CgTag;
import org.languagetool.dev.conversion.cg.CgTextualParser;

public class CgRuleConverter extends RuleConverter {

	private CgGrammar grammar;
	private String[] lines;
	private static String tagDelimiter = ":";
	
	// basic constructor
	public CgRuleConverter() {
		super();
	}
	
	public CgRuleConverter(String infile, String outfile, String specificFiletype) {
		super(infile, outfile, specificFiletype);
	}
	
	// Get methods
	public CgGrammar getGrammar() {return this.grammar;}
	
	// Set methods
	public void setGrammar(CgGrammar grammar) {this.grammar = grammar;}
	
	public void setTagDelimiter(String td) {tagDelimiter = td;}
	
	@Override
	public void parseRuleFile() throws IOException {
		parseCgFile();	// builds the grammar
		List<CgRule> ruleList = new ArrayList<CgRule>();
		for (CgRule rule : grammar.rule_by_number) {
			ruleList.add(rule);
		}
		ruleObjects	= ruleList;
		ltRules = new ArrayList<List<String>>();
		allLtRules = new ArrayList<List<String>>();
		disambiguationRules = new ArrayList<List<String>>();
		originalRuleStrings = new ArrayList<String>();
		warnings = new ArrayList<String[]>();
		for (Object ruleObject : ruleObjects) {
			CgRule cgrule = (CgRule) ruleObject;
			List<String> ruleAsList = ltRuleAsList(cgrule,generateId(ruleObject),generateName(ruleObject),cgrule.type.name());
			disambiguationRules.add(ruleAsList);
			allLtRules.add(ruleAsList);
			originalRuleStrings.add(lines[cgrule.line]);
		}
	}
	
	
	@Override
	public boolean isDisambiguationRule(Object ruleObject) {
		return true;	// all cg rules are disambiguation rules
	}
	
	public void parseCgFile() throws IOException { 
		File file = new File(inFileName);
		grammar = new CgGrammar();
		CgTextualParser parser = new CgTextualParser(grammar, file);
		int result = parser.parse_grammar_from_file(inFileName, null, null);
		if (result == 0) {
			//System.out.println("Successfully parsed constraint grammar file " + inFileName);
		} else {
			System.err.println("Failed to parse constraint grammar file " + inFileName);
		}
		getGrammarFileLines(inFileName);
	}
	
	/**
	 * Grabs the original lines of the CG grammar file. Mainly for retrieving the original rule strings to include in comments.
	 * @param filename
	 */
	public void getGrammarFileLines(String filename) {
		BufferedReader reader = null;
        StringBuilder sb = new StringBuilder();
        String inArray = "";
        // put some buffer space at the beginning, just to be sure
        sb.append("    ");
        try {
            reader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8"));
            int c = reader.read();
            while (c != -1) {
                sb.append((char)c);
                c = reader.read();
            }
            inArray = sb.toString();
            reader.close();
        } catch (IOException e) {
            System.err.println("Error opening grammar file");
            System.exit(1);
        } 
        String[] lines = inArray.split("\n");
        this.lines = lines;;
	}
	
	/**
	 * Takes a single {@link CgRule} and converts it into a list of lines of a LT rule.
	 * Sometimes the CG rule has to be split into several LT rules. In this case, the rule 
	 * is added as a bunch of rules in a rulegroup
	 */
	@Override
	public List<String> ltRuleAsList(Object ruleObject, String id, String name, String type) {
		CgRule rule = (CgRule)ruleObject;
		type = rule.type.name();	// like K_SELECT or K_REMOVE
		List<String> ltRule = new ArrayList<String>();
		ArrayList<String> currentWarnings = new ArrayList<String>();

		String cgRuleString = lines[rule.line];
		ltRule.add("<!-- " + cgRuleString + " -->");
//		ArrayList<CgRule> rules = splitUnificationRule(mainRule, grammar);
//		for (CgRule rule : rules) {
		ArrayList<Token> tokensList = new ArrayList<Token>();
		ArrayList<ArrayList<Token>> outerList = new ArrayList<ArrayList<Token>>(); 	// in case we need to split the rule into several rules
		ArrayList<Token[]> processedLists = new ArrayList<Token[]>();
		
		CgSet targetSet = expandSetSets(grammar.getSet(rule.target));
		Token target = new Token(targetSet,false,0,false,false,new CgSet(),false,0,false);
		if (!isOrCompatible(target)) {
			System.err.println("Target for rule on line " + rule.line + " cannot be represented as one LT rule. Consider rewriting it.");
			return new ArrayList<String>();
		}
		tokensList.add(target);
		ArrayList<CgContextualTest> sortedTestsHeads = new ArrayList<CgContextualTest>();
		// puts the parent test at the end, so it's processed last
		for (CgContextualTest test : rule.test_heads) {
			if (test.isParentTest()) {
				sortedTestsHeads.add(test);
			} else {
				sortedTestsHeads.add(0, test);
			}
		}
		
		for (CgContextualTest test : sortedTestsHeads) {
			if (test.isNormalTest()) {
				Token testToken = getTokenFromNormalTest(test);
				tokensList.add(testToken);
				// only accounts for a single branching (i.e. one parent test)
			} else if (test.isParentTest()) {
				if (!outerList.isEmpty()) {
					System.err.println("Can't have two parent tests in one test on line " + rule.line + "\nTry splitting it up.");
					System.exit(1);
				}
				for (int testInt : test.ors) {
					ArrayList<Token> newTokenList = copyTokenList(tokensList);
					CgContextualTest childTest = rule.test_map.get(testInt);
					if (childTest.isNormalTest()) {
						Token childTestToken = getTokenFromNormalTest(childTest);
						newTokenList.add(childTestToken);
					} else if (childTest.isLinkedTest()) {
						ArrayList<CgContextualTest> linkedTests = new ArrayList<CgContextualTest>();
						CgContextualTest curTest = childTest;
						while (curTest.next != 0) {	// while there are still more tests to link to
							linkedTests.add(curTest);
							curTest = rule.test_map.get(curTest.next);
						} 	
						linkedTests.add(curTest); 	// add the last linked test
						
						Token headLinkedToken = getLinkedTokens(linkedTests);	// modifies the offsets for the linked tests
						newTokenList.add(headLinkedToken);
					}
					
					outerList.add(newTokenList);
				}
			} else if (test.isLinkedTest()) {
				// add all the linked tests to a list
				ArrayList<CgContextualTest> linkedTests = new ArrayList<CgContextualTest>();
				CgContextualTest curTest = test;
				while (curTest.next != 0) {	// while there are still more tests to link to
					linkedTests.add(curTest);
					curTest = rule.test_map.get(curTest.next);
				} 	
				linkedTests.add(curTest); 	// add the last linked test
				
				Token headLinkedToken = getLinkedTokens(linkedTests);	// modifies the offsets for the linked tests
				tokensList.add(headLinkedToken);
				
			}
		}
		// if the outerList is empty, we haven't had a parent test, so we can just add the tokensList to the outerList and process it
		if (outerList.isEmpty()) {	
			outerList.add(tokensList);
		}
		// pre-process/split all the tests
		// they come off outerList and go back onto processedLists
		// first split off the special case of the negative backward barrier scan
		for (int i=0;i<outerList.size();i++) {
			Token[] tokens = outerList.get(i).toArray(new Token[outerList.get(i).size()]);
			if (negativeBackwardBarrierScan(tokens)) {
				ArrayList<List<Token>> split = splitNegativeBackwardBarrierScan(tokens);
				outerList.remove(i);
				for (List<Token> splitList : split) {
					outerList.add(i, new ArrayList<Token>(splitList));
					i++;
				}
			}
		}
		
		for (int i=0;i<outerList.size();i++) {
			Token[] tokens = outerList.get(i).toArray(new Token[outerList.get(i).size()]);
			Arrays.sort(tokens);
			tokens = addGapTokens(tokens);
			if (skipSafe(tokens)) {
				tokens = addSkipTokens(tokens);
				tokens = resolveLinkedTokens(tokens);
				if (!singleRuleCompatible(tokens)) {
					ArrayList<List<Token>> singleRuleCompatibleTokens = splitForSingleRule(tokens);
					for (List<Token> srctl : singleRuleCompatibleTokens) {
						Token[] srcta = srctl.toArray(new Token[srctl.size()]);
						processedLists.add(srcta);
					}
				} else {
					processedLists.add(tokens);
				}
			}
			else {
				ArrayList<List<Token>> splitTokenLists = getSkipSafeTokens(tokens);
				for (int j=0;j<splitTokenLists.size();j++) {
					Token[] indSplitTokenList = splitTokenLists.get(j).toArray(new Token[splitTokenLists.get(j).size()]);
					indSplitTokenList = addSkipTokens(indSplitTokenList);
					indSplitTokenList = resolveLinkedTokens(indSplitTokenList);
					Arrays.sort(indSplitTokenList);
					indSplitTokenList = addGapTokens(indSplitTokenList);
					if (!singleRuleCompatible(indSplitTokenList)) {
						ArrayList<List<Token>> singleRuleCompatibleTokens = splitForSingleRule(indSplitTokenList);
						for (List<Token> srctl : singleRuleCompatibleTokens) {
							Token[] srcta = srctl.toArray(new Token[srctl.size()]);
							processedLists.add(srcta);
						}
					} else {
						processedLists.add(indSplitTokenList);
					}
				}
			}
		}
		
		// the actual rule generation
		if (processedLists.size() == 1) {
			Token[] tokens = processedLists.get(0);
			List<String> ltRule2 = getRuleByType(targetSet, tokens, rule, id, name, type);
			ltRule.addAll(ltRule2);
		} else {
			ltRule.add("<rulegroup name=\"" + generateName(ruleObject) + "\">");
			for (Token[] tokens : processedLists) {
				List<String> ltRule2 = getRuleByType(targetSet, tokens, rule, null, null, type);
				ltRule.addAll(ltRule2);
			}
			ltRule.add("</rulegroup>");
		}
		
//		}
		warnings.add(currentWarnings.toArray(new String[currentWarnings.size()]));
		return ltRule;
	}
	
	// ** METHODS THAT SPLIT A RULE INTO MULTIPLE RULES **
	
	/**
	 * For if there's multiple surface/base forms and postags in a single token that we can't "or" together
	 * in one LT token. E.g. if a token includes ("man" or "woman" or NN or NNP), it'd have to be split.
	 * @param tokens
	 * @return
	 */
	public ArrayList<List<Token>> splitForSingleRule(Token[] tokens) {
		ArrayList<List<Token>> list = new ArrayList<List<Token>>();
		ArrayList<Token> tokenList = new ArrayList<Token>(Arrays.asList(tokens));
		list.add(tokenList);
		boolean notdone = true;
		while (notdone) {
			for (int i=0;i<list.size();i++) {
				List<Token> insideList = list.get(i);
				if (singleRuleCompatible(insideList.toArray(new Token[insideList.size()]))) {
					if (i == list.size()-1) notdone = false;
					continue;
				} else {
					list.remove(i);
					ArrayList<List<Token>> splitTokens = splitListForSingleRule(insideList);
					for (List<Token> ind : splitTokens) {
						list.add(ind);
					}
					break;
				}
			}
		}
		
		return list;
	}
	
	/**
	 * Actually performs the splitting for wrapper method splitForSingleRule. Only performs a split for a single token.
	 * @param tokens
	 * @return
	 */
	public ArrayList<List<Token>> splitListForSingleRule(List<Token> tokens) {
		ArrayList<List<Token>> list = new ArrayList<List<Token>>();
		final ArrayList<Token> firstList = new ArrayList<Token>();
		list.add(firstList);
		int i=0;
		for (i=0;i<tokens.size();i++) {
			if (isOrCompatible(tokens.get(i))) {
				firstList.add(tokens.get(i));
			} else {
				list.remove(firstList);
				ArrayList<CgSet> newSets = splitCgSet(tokens.get(i).target);
				for (CgSet set : newSets) {
					Token newToken = new Token(tokens.get(i));
					newToken.target = expandSetSets(set);
					newToken.postags = newToken.target.getPostagsString();
					newToken.baseforms = newToken.target.getSingleTagBaseformsString();
					newToken.surfaceforms = newToken.target.getSingleTagSurfaceformsString();
					newToken.compositeTags = newToken.target.getCompositeTags();
					// clone the first list
					ArrayList<Token> newList = new ArrayList<Token>();
					for (Token token : firstList) {
						newList.add(new Token(token));
					}
					// add the new token
					newList.add(newToken);
					// add the new list
					list.add(newList);
				}
				break;
			}
		}
		// finish up the list
		for (int j=i+1;j<tokens.size();j++) {
			for (int k=0;k<list.size();k++) {
				ArrayList<Token> insideList = (ArrayList<Token>)list.get(k);
				insideList.add(tokens.get(j));
				list.set(k,insideList);
			}
		}
		return list;
	}
	
	/**
	 * Splits off part of a CgSet that can't be represented in a single LT token
	 * @param target
	 * @return
	 */
	public ArrayList<CgSet> splitCgSet(CgSet target) {
		// setting up the lists to perform the check
		ArrayList<CgSet> newSets = new ArrayList<CgSet>();
		CgTag[] postags = target.getSingleTagPostags();
		CgTag[] baseforms = target.getSingleTagBaseforms();
		CgTag[] surfaceforms = target.getSingleTagSurfaceforms();
		CgCompositeTag[] compositePostags = target.getCompositePostags();
		
		// actually checking and doing the splitting
		if (postags.length > 0 && baseforms.length > 0) {
			CgSet set1 = new CgSet(target);
			CgSet set2 = new CgSet(set1);
			set1.single_tags.removeAll(Arrays.asList(postags));
			set1.tags.removeAll(Arrays.asList(compositePostags));
			set2.single_tags.removeAll(Arrays.asList(baseforms));
			newSets.add(set1);
			newSets.add(set2);
			return newSets;
		}
		if (postags.length > 0 && surfaceforms.length > 0) {
			CgSet set1 = new CgSet(target);
			CgSet set2 = new CgSet(target);
			set1.single_tags.removeAll(Arrays.asList(postags));
			set1.tags.removeAll(Arrays.asList(compositePostags));
			set2.single_tags.removeAll(Arrays.asList(surfaceforms));
			newSets.add(set1);
			newSets.add(set2);
			return newSets;
		}
		if (surfaceforms.length > 0 && baseforms.length > 0) {
			CgSet set1 = new CgSet(target);
			CgSet set2 = new CgSet(target);
			set1.single_tags.removeAll(Arrays.asList(surfaceforms));
			set2.single_tags.removeAll(Arrays.asList(baseforms));
			newSets.add(set1);
			newSets.add(set2);
			return newSets;
		}
		// if we didn't catch the culprit in the single tags, it must be in the composite tags,
		// which means that there exists two composite tags that have different types of tags in them.
		// I could try to do this in a principled way, or I could just split off each composite tag. 
		// This seems like the better idea for now.
		newSets = groupCompositeTags(target);
		/*
		for (CgCompositeTag ctag : compositeTags) {
			CgSet set1 = new CgSet(target);
			CgSet set2 = new CgSet(target);
			set1.tags.remove(ctag);
			set2.tags.removeAll(Arrays.asList(compositeTags));
			set2.tags.removeAll(Arrays.asList(compositePostags));
			set2.single_tags = new HashSet<CgTag>();
			set2.tags.add(ctag);
			twoSets.add(set1);
			twoSets.add(set2);
			return twoSets;
		}
		*/
		// it should never get to here, because it should never get a set that doesn't need to be split passed to it.
		return newSets;
	}
	
	/**
	 * Groups the composite tags along lines that can be represented in a single LT rule
	 * @param target
	 * @return
	 */
	@SuppressWarnings("unchecked")
	private ArrayList<CgSet> groupCompositeTags(CgSet target) {
		HashMap<String,ArrayList<CgCompositeTag>> bf = new HashMap<String,ArrayList<CgCompositeTag>>();
		HashMap<String,ArrayList<CgCompositeTag>> sf = new HashMap<String,ArrayList<CgCompositeTag>>();
		HashMap<String,CgCompositeTag> dict = new HashMap<String,CgCompositeTag>();	// dictionary of sorts
		for (CgCompositeTag ctag : target.tags) {
			CgCompositeTag postags = new CgCompositeTag();
			CgCompositeTag baseforms = new CgCompositeTag();
			CgCompositeTag surfaceforms = new CgCompositeTag();
			for (CgTag tag : ctag.tags) {
				if (isBaseForm(tag.tag)) {
					baseforms.addTag(tag);
				} else if (isSurfaceForm(tag.tag)) {
					surfaceforms.addTag(tag);
				} else if (isPostag(tag.tag)) {
					postags.addTag(tag);
				}
			}
			if (!postags.isEmpty()) {
				if (!baseforms.isEmpty()) {
					bf = (HashMap)smartPut(bf, postags.toString(), baseforms);
				}
				else if (!surfaceforms.isEmpty()) {	// assumes there won't be both sf and bf in the same composite tag
					sf = (HashMap)smartPut(sf, postags.toString(), surfaceforms);
				}
			}
			dict.put(postags.toString(), postags);
		}
		ArrayList<CgSet> ret = new ArrayList<CgSet>();
		for (String postagSet : bf.keySet()) {
			CgSet newSet = new CgSet(target);
			newSet.tags = new HashSet<CgCompositeTag>();
			ArrayList<CgCompositeTag> bfs = bf.get(postagSet);
			for (CgCompositeTag singleBf : bfs) {
				CgCompositeTag newTotalTag = new CgCompositeTag();
				for (CgTag tag : dict.get(postagSet).tags) {
					newTotalTag.addTag(tag);
				}
				for (CgTag tag : singleBf.tags) {
					newTotalTag.addTag(tag);
				}
				newSet.addCompositeTag(newTotalTag);
			}
			ret.add(newSet);
		}
		return ret;
	}
	
	/**
	 * returns separate Token arrays, each of which is safe for dealing with scanning tokens
	 * and each of which will be a separate rule. This applies when we have a scanning token
	 * before a non-scanning token, offset wise
	 */
	public ArrayList<List<Token>> getSkipSafeTokens(Token[] tokens) {
		ArrayList<List<Token>> list = new ArrayList<List<Token>>();
		List<Token> tokenList = Arrays.asList(tokens);
		list.add(tokenList);
		boolean notdone = true;
		while (notdone) {
			for (int i=0;i<list.size();i++) {
				List<Token> insideList = list.get(i);
				if (skipSafe(insideList.toArray(new Token[insideList.size()]))) {
					if (i == list.size()-1) {
						notdone = false;
					}
				} else {
					list.remove(i);
					ArrayList<List<Token>> splitTokens = splitOutSkipTokens(insideList);
					for (List<Token> isl : splitTokens) {
						list.add(isl);
					}
					break;
				}
			}
			
		}
		return list;
	}
	
	/**
	 * Actually does the splitting for wrapper method getSkipSafeTokens
	 * @param tokens
	 * @return
	 */
	public ArrayList<List<Token>> splitOutSkipTokens(List<Token> tokens) {
		ArrayList<List<Token>> list = new ArrayList<List<Token>>();
		ArrayList<Token> scanningTokens = new ArrayList<Token>();
		ArrayList<Token> reverseScanningTokens = new ArrayList<Token>();
		ArrayList<Token> normalTokens = new ArrayList<Token>();
		for (Token token : tokens) {
			if (token.scanahead) scanningTokens.add(token);
			else if (token.scanbehind) reverseScanningTokens.add(token);
			else normalTokens.add(token);
		}
		// forward scans
		for (int s=0;s<scanningTokens.size();s++) {
			final Token scanning = scanningTokens.get(s);
			for (int n=0;n<normalTokens.size();n++) {
				final Token normal = normalTokens.get(n);
				if (normal.offset >= scanning.offset) {
					ArrayList<Token> newTokenList1 = new ArrayList<Token>();
					ArrayList<Token> newTokenList2 = new ArrayList<Token>();
					for (Token ntoken : normalTokens) {
						newTokenList1.add(ntoken);
						newTokenList2.add(ntoken);
					}
					Token newNormalToken = new Token(scanning);
					newNormalToken.scanahead = false;
					newTokenList1.add(newNormalToken);
					scanning.offset++;
					newTokenList2.add(scanning);
					list.add(newTokenList1);
					list.add(newTokenList2);
					return list;
				}
			}
		}
		// backward scans
		for (int s=0;s<reverseScanningTokens.size();s++) {
			final Token scanning = reverseScanningTokens.get(s);
			for (int n=0;n<normalTokens.size();n++) {
				final Token normal = normalTokens.get(n);
				if (normal.offset <= scanning.offset) {
					ArrayList<Token> newTokenList1 = new ArrayList<Token>();
					ArrayList<Token> newTokenList2 = new ArrayList<Token>();
					for (Token ntoken : normalTokens) {
						newTokenList1.add(ntoken);
						newTokenList2.add(ntoken);
					}
					Token newNormalToken = new Token(scanning);
					newNormalToken.scanbehind = false;
					newTokenList1.add(newNormalToken);
					scanning.offset--;
					newTokenList2.add(scanning);
					list.add(newTokenList1);
					list.add(newTokenList2);
					return list;
				}
			}
		}
		
		return null;
	}
	
	/**
	 * Handles special case of negative backwards barrier scan (e.g. (NOT -1* Verb BARRIER CLB));
	 * @param tokens
	 * @return
	 */
	public ArrayList<List<Token>> splitNegativeBackwardBarrierScan(Token[] tokens) {
		ArrayList<List<Token>> list = new ArrayList<List<Token>>();
		ArrayList<Token> newTokenList1 = new ArrayList<Token>();
		ArrayList<Token> newTokenList2 = new ArrayList<Token>();
		int index=0;
		for (index = 0;index<tokens.length;index++) {
			if (tokens[index].scanbehind && tokens[index].negate && !tokens[index].barrier.isEmpty()) {
				Token newToken = new Token(tokens[index]);
				newToken.barrier = new CgSet();
				newTokenList1.add(tokens[index]);
				newTokenList2.add(newToken);
				break;
			} else {
				newTokenList1.add(tokens[index]);
				newTokenList2.add(tokens[index]);
			}
		}
		// finish off the rest
		for (index=index+1;index<tokens.length;index++) {
			newTokenList1.add(tokens[index]);
			newTokenList2.add(tokens[index]);
		}
		list.add(newTokenList1);
		list.add(newTokenList2);
		return list;
	}
	
//	// takes a rule that has unification tags (e.g. $$NUMBER) and splits it into several easier to handle rules
//	public ArrayList<CgRule> splitUnificationRule(CgRule rule, CgGrammar grammar) {
//		ArrayList<CgRule> rules = new ArrayList<CgRule>();
//		// go over all the tests in the test_map, which should contain all tests for the entire rule, and every time you see a 
//		// unification tag, put in one of its component parts and add that modified rule to the list of new rules.
//		HashSet<CgSet> unifyingSets = new HashSet<CgSet>();
//		// add all the unifying sets in the rule
//		for (Iterator<Integer> iter = rule.test_map.keySet().iterator(); iter.hasNext(); ) {
//			CgContextualTest curTest = rule.test_map.get(iter.next());
//			CgSet target = grammar.getSet(curTest.target);
//			for (Integer setint : target.sets) {
//				CgSet set = grammar.getSet(setint);
//				if (set.type.contains(ST.ST_TAG_UNIFY.value)) {
//					unifyingSets.add(set);
//				}
//			}
//		}
//		CgSet targetSet = grammar.getSet(rule.target);
//		for (int setint : targetSet.sets) {
//			if (grammar.getSet(setint).type.contains(ST.ST_TAG_UNIFY.value)) {
//				unifyingSets.add(grammar.getSet(setint));
//			}
//		}
//		// if no unifying sets, just return the rule
//		if (unifyingSets.size() == 0) {
//			rules.add(rule);
//		}
//		for (CgSet unifyingSet : unifyingSets) {
//			CgSet unifyingSetExpanded = expandSetSets(unifyingSet);
//			for (CgTag tag : unifyingSetExpanded.single_tags) {
//				CgSet oldTargetSet = new CgSet(grammar.getSet(rule.target));
//				CgRule newRule = new CgRule(rule);
//				if (oldTargetSet.sets.contains(unifyingSet.hash)) {
//					oldTargetSet.sets.remove((Object)unifyingSet.hash);
//					oldTargetSet.single_tags.add(tag);
//					oldTargetSet.rehash();
//					grammar.addSet(oldTargetSet);
//					newRule.target = oldTargetSet.hash;
//				}
//				
//				for (Iterator<Integer> iter = newRule.test_map.keySet().iterator(); iter.hasNext();) {
//					int testKey = iter.next();
//					CgContextualTest test = newRule.test_map.get(testKey);
//					CgSet oldTestTargetSet = new CgSet(grammar.getSet(test.target));
//					if (oldTestTargetSet.sets.contains(unifyingSet.hash)) {
//						oldTestTargetSet.sets.remove(unifyingSet);
//						oldTestTargetSet.single_tags.add(tag);
//						oldTestTargetSet.rehash();
//						grammar.addSet(oldTestTargetSet);
//						test.target = oldTestTargetSet.hash;
//					}
//					newRule.test_map.put(testKey, test);
//				}
//				rules.add(newRule);
//			}
//		}
//		return rules;
//	}
	
	
	// ** METHODS THAT MODIFY A SINGLE TOKEN LIST **
	
	/**
	 * Expands the tests in a linked test to multiple tokens
	 */
	public Token[] resolveLinkedTokens(Token[] tokens) {
		ArrayList<Token> tokenList = new ArrayList<Token>(Arrays.asList(tokens));
		boolean notdone = true;
		while (notdone) {
			for (int i=0;i<tokenList.size();i++) {
				Token curToken = tokenList.get(i);
				if (curToken.nextToken != null) {
					Token tempToken = new Token(curToken.nextToken);
					tempToken.offset = curToken.offset + tempToken.relativeOffset;	// to fix the offsets
					tokenList.add(i+1, tempToken);
					Token temp2 = new Token(curToken);
					temp2.nextToken = null;
					tokenList.set(i, temp2);
					break;
				} else {
					if (i == tokenList.size()-1) notdone = false;
				}
			}
		}
		return tokenList.toArray(new Token[tokenList.size()]);
	}
	
	/**
	 * contains the list of linked CgContextualTests, to translate into Token format
	 * returns only the head of the linked tokens, have to iterate through them later when you add them
	 */
	public Token getLinkedTokens(ArrayList<CgContextualTest> tests) {
		ArrayList<Token> tokens = new ArrayList<Token>();
		for (int i=0;i<tests.size();i++) {
			if (i == 0) {
				// this kind of assumes that it won't be a parent test
				tokens.add(getTokenFromNormalTest(tests.get(i)));
			} else {
				Token token = getTokenFromNormalTest(tests.get(i));
				token.relativeOffset = token.offset;
				token.offset = token.offset + tokens.get(i-1).offset;
				token.prevToken = tokens.get(i-1);
				tokens.add(token);
			}
		}
		// sort tokens by offset and rearrange them
		Token[] ts = tokens.toArray(new Token[tokens.size()]);
		Arrays.sort(ts);
		// this assumes there's only one scan in the linked tests
		// the rationale here is that if it switches positions of tokens, and the 
		// one that's a scan token gets pushed further back in the list, its scan flag will go back to the front.
		for (int i=0;i<ts.length;i++) {
			if (ts[i].scanahead) {
				ts[i].scanahead = false;
				ts[0].scanahead = true;
				break;
			}
		}
		// check to make sure there's not another scan in the linked tests:
		for (int i=1;i<ts.length;i++) {
			if (ts[i].scanahead) {
				System.err.println("Two scan tests in one series of linked tests. This is really hard to represent in LT format. Try to split it into several rules");
				System.exit(1);
			}
		}
		ts = addLinkedGapTokens(ts);
		for (int i=0;i<ts.length;i++) {
			 if (i == 0) {
				 ts[i].relativeOffset = 0;
			 } else {
				 ts[i].relativeOffset = 1;
			 }
			 if (i != ts.length - 1) ts[i].nextToken = ts[i+1];
			 if (i != 0) ts[i].prevToken = ts[i-1];
		}
		return ts[0];
	}
	
	/**
	 * Adds gap tokens in the case of a linked (sub) list of tokens
	 * @param ts
	 * @return
	 */
	public Token[] addLinkedGapTokens(Token[] ts) {
		ArrayList<Token> tokens = new ArrayList<Token>(Arrays.asList(ts));
		boolean notdone = true;
		while (notdone) {
			for (int i=0;i<tokens.size();i++) {
				if (i==0) continue;
				else if (i > 0 && i < tokens.size()) {
					if (tokens.get(i).offset == tokens.get(i-1).offset || tokens.get(i).offset == (tokens.get(i-1).offset + 1)) {
						if (i == tokens.size() - 1) notdone = false;
						continue;
					}
					else {
						Token newToken = new Token(new CgSet(), false, tokens.get(i-1).offset + 1, false, false, new CgSet(), false, 0, false);
						newToken.relativeOffset = tokens.get(i-1).relativeOffset + 1;
						Token oldToken = tokens.get(i-1);
						oldToken.relativeOffset = -1;
						tokens.set(i-1, oldToken);
						tokens.add(i,newToken);
						break;
					}
				}
				if (i == tokens.size() - 1) {
					notdone = false;
				}
			}
		}
		return tokens.toArray(new Token[tokens.size()]);
	}
	
	/**
	 * only gets called if it's safe to add skip tokens in a straightforward manner, otherwise, we split them out
	 * if it's a negative scanning string (NOT 1* Verb BARRIER CLB), then the exception string is the target
	 * if it's a positive scanning string (1* Verb BARRIER CLB), then the exception string is the barrier and the end of sentence
	 * it always goes in the previous token with scope next
	 */
	public Token[] addSkipTokens(Token[] tokens) {
		ArrayList<Token> tokenList = new ArrayList<Token>(Arrays.asList(tokens));
		for (int i=0;i<tokenList.size();i++) {
			// forward scans (1* Verb)
			if (tokenList.get(i).scanahead) {
				if (i == 0) {
					Token newToken = new Token(new CgSet(), false, tokenList.get(i).offset - 1, false, false, new CgSet(), false, -1, false);
					if (!tokenList.get(i).barrier.isEmpty() || tokenList.get(i).negate) {
						newToken.exceptionString = getBarrierExceptionStringFromToken(tokenList.get(i));
					}
					Token oldToken = tokenList.get(i);
					// if it's a negative scan (NOT 1* Noun), then the target of the next token becomes the barrier + SENT_END
					if (oldToken.negate) {	
						CgSet newTarget = oldToken.barrier;
						CgTag sentEndTag = new CgTag();
						sentEndTag.tag = SENT_END;
						newTarget.single_tags.add(sentEndTag);
						oldToken.target = newTarget;
						oldToken.postags = oldToken.target.getPostagsString();
						oldToken.baseforms = oldToken.target.getSingleTagBaseformsString();
						oldToken.surfaceforms = oldToken.target.getSingleTagSurfaceformsString();
						oldToken.compositeTags = oldToken.target.getCompositeTags();
						tokenList.set(0, oldToken);
					}
					tokenList.add(0, newToken);
				} else {
					int index = i-1;
					String exceptionString = null;
					if (!tokenList.get(i).barrier.isEmpty() || tokenList.get(i).negate) {
						exceptionString = getBarrierExceptionStringFromToken(tokenList.get(i));
					}
					
					int prevOffset = tokenList.get(index).offset;
					while (index >= 0 && tokenList.get(index).offset == prevOffset) {
						Token prevToken = tokenList.get(index);
						prevToken.skip = -1;
						prevToken.exceptionString = exceptionString;
						tokenList.set(index, prevToken);
						index--;
					}
					Token oldToken = tokenList.get(i);
					if (oldToken.negate) {
						CgSet newTarget = oldToken.barrier;
						CgTag sentEndTag = new CgTag();
						sentEndTag.tag = SENT_END;
						newTarget.single_tags.add(sentEndTag);
						oldToken.target = newTarget;
						oldToken.postags = oldToken.target.getPostagsString();
						oldToken.baseforms = oldToken.target.getSingleTagBaseformsString();
						oldToken.surfaceforms = oldToken.target.getSingleTagSurfaceformsString();
						oldToken.compositeTags = oldToken.target.getCompositeTags();
						oldToken.negate = false;
						tokenList.set(i,oldToken);
					}
				}
			}
			// reverse scans (-1* Verb)
			else if (tokenList.get(i).scanbehind) {
				Token newToken = new Token(new CgSet(), false, tokenList.get(i).offset - 1, false, false, new CgSet(), false, -1, false);
				String exceptionString = null;
				if (!tokenList.get(i).barrier.isEmpty() || tokenList.get(i).negate) {
					exceptionString = getBarrierExceptionStringFromToken(tokenList.get(i));
				}
				CgSet newTarget = newToken.target;
				CgTag sentStartTag = new CgTag();
				sentStartTag.tag = SENT_START;
				newTarget.single_tags.add(sentStartTag);
				newToken.target = newTarget;
				newToken.postags = newToken.target.getPostagsString();
				newToken.baseforms = newToken.target.getSingleTagBaseformsString();
				newToken.surfaceforms = newToken.target.getSingleTagSurfaceformsString();
				newToken.compositeTags = newToken.target.getCompositeTags();
				newToken.skip = -1;
				Token oldToken = tokenList.get(i);
				oldToken.skip = -1;
				oldToken.scanbehind = false;
				// if there's no barrier
				if (oldToken.barrier.isEmpty()) {
					if (oldToken.negate) {
						newToken.exceptionString = exceptionString;
						newToken.offset++;
						tokenList.set(i, newToken);
					} else {
						tokenList.set(i, oldToken);
						tokenList.add(i, newToken);
						i++;
					}
				}
				// if there IS a barrier
				else {
					if (oldToken.negate) {
						oldToken.target = oldToken.barrier;
						oldToken.postags = oldToken.target.getPostagsString();
						oldToken.baseforms = oldToken.target.getSingleTagBaseformsString();
						oldToken.surfaceforms = oldToken.target.getSingleTagSurfaceformsString();
						oldToken.compositeTags = oldToken.target.getCompositeTags();
						oldToken.exceptionString = exceptionString;
						tokenList.set(i,oldToken);
						tokenList.add(i,newToken);
					} else {
						oldToken.exceptionString = exceptionString;
						tokenList.set(i, oldToken);
						tokenList.add(i, newToken);
						i++;
					}
				}
			}
		}
		return tokenList.toArray(new Token[tokenList.size()]);
	}
	
	/**
	 * For cases where there needs to be an empty token inserted in order to make a proper LT pattern
	 * e.g. REMOVE N IF (1 Verb) (3 Det);
	 * @param tokens
	 * @return
	 */
	public Token[] addGapTokens(Token[] tokens) {
		boolean notdone = true;
		ArrayList<Token> tokenList = new ArrayList<Token>(Arrays.asList(tokens));
		while (notdone) {
			for (int i=0;i<tokenList.size();i++) {
				if (i == 0) continue;
				else if (i > 0 && i < tokenList.size()){
					if (tokenList.get(i).offset == tokenList.get(i-1).offset) {
						if (i == tokenList.size()-1) notdone = false;
						continue;
					}
					if (tokenList.get(i).offset != (tokenList.get(i-1).offset + 1) && tokenList.get(i).prevToken == null) {
						tokenList.add(i, new Token(new CgSet(), false, tokenList.get(i-1).offset + 1, false, false, new CgSet(), false, 0, false));
						break;
					}
				}
				if (i == tokenList.size() - 1) {
					notdone = false;
				}
			}
		}
		return tokenList.toArray(new Token[tokenList.size()]);
	}
	
	public ArrayList<Token> copyTokenList(ArrayList<Token> tokens) {
		ArrayList<Token> newList = new ArrayList<Token>();
		for (Token token : tokens) {
			newList.add(new Token(token));
		}
		return newList;
	}
	
	public ArrayList<Token> removeExtraEmptyTokens(ArrayList<Token> tokens) {
		if (tokens.size() == 1) {
			return tokens;
		} else {
			ArrayList<Token> newTokenList = new ArrayList<Token>();
			for (Token token : tokens) {
				if (!token.isEmpty()) {
					newTokenList.add(token);
				}
			}
			if (newTokenList.isEmpty()) {
				newTokenList.add(tokens.get(0));
			}
			return newTokenList;
		}
	}
	
	
	// ** METHODS THAT CHECK THE PROPERTIES OF A RULE **
	
	/**
	 * returns if the rule is safe to add the "skip" attribute to the previous token, as we do normally;
	 * otherwise, splits the tokens and returns multiple rules.
	 */
	public boolean skipSafe(Token[] tokens) {
		HashSet<Token> scanningTokens = new HashSet<Token>();
		HashSet<Token> reverseScanningTokens = new HashSet<Token>();
		HashSet<Token> normalTokens = new HashSet<Token>();
		for (Token token : tokens) {
			if (token.scanahead) {
				scanningTokens.add(token);
			} else if (token.scanbehind) {
				reverseScanningTokens.add(token);
			} else {
				normalTokens.add(token);
			}
		}
		for (Token s : scanningTokens) {
			for (Token o : normalTokens) {
				if (s.offset <= o.offset) return false;
			}
		}
		for (Token s : reverseScanningTokens) {
			for (Token o : normalTokens) {
				if (s.offset >= o.offset) return false;
			}
		}
		return true;
	}
	
	/**
	 * returns true of the rule contains an example of a negative backwards barrier scan (NOT -1* Adj BARRIER Noun), e.g. 
	 * these rules have to be treated as special cases
	 */
	public boolean negativeBackwardBarrierScan(Token[] tokens) {
		for (Token token : tokens) {
			if (token.scanbehind && token.negate && !token.barrier.isEmpty()) {
				return true;
			}
		}
		return false;
	}
	
	/**
	 * 
	 * @param tokens
	 * @return
	 */
	public boolean singleRuleCompatible(Token[] tokens) {
		for (Token token : tokens) {
			if (!isOrCompatible(token)) return false;
		}
		return true;
	}
	
	/**
	 * Returns true if the {@link Token} can be represented in a single LT rule, false otherwise.
	 * Example: "are"|noun cannot be represented as one LT token
	 * @param token
	 * @return
	 */
	public boolean isOrCompatible(Token token) {
		if (token.postags.length > 0 && (token.baseforms.length > 0 || token.surfaceforms.length > 0)) {
			return false;
		}
		if (token.baseforms.length > 0 && token.surfaceforms.length > 0) {
			return false;
		}
		if (token.compositeTags.length > 0 && (token.postags.length > 0 || token.baseforms.length > 0 || token.surfaceforms.length > 0)) {
			return false;
		}
		HashSet<String> pos = new HashSet<String>();
		HashSet<String> base = new HashSet<String>();
		HashSet<String> surf = new HashSet<String>();
		for (CgCompositeTag ctag : token.compositeTags) {
			CgCompositeTag postagCompile = new CgCompositeTag();
			for (CgTag tag : ctag.tags) {
				if (isPostag(tag.tag)) {
					postagCompile.addTag(tag);
				} else if (isSurfaceForm(tag.tag)) {
					surf.add(tag.tag);
				} else if (isBaseForm(tag.tag)) {
					base.add(tag.tag);
				}
			}
			pos.add(postagCompile.toString());
		}
		if (pos.size() > 1 && (surf.size() > 1 || base.size() > 1)) {
			return false;
		}
		if (surf.size() > 1 && base.size() > 1) {
			return false;
		}
		if (surf.size() > 0 && base.size() > 0 && pos.size() > 0) {
			return false;
		}
		return true;
	}

	
	
	
	// ** METHODS THAT ACTUALLY GENERATE THE LT RULES **
	
	/**
	 * Actual LT rule generation
	 */
	@SuppressWarnings("unchecked")
	public List<String> getRuleByType(CgSet target, Token[] tokens, CgRule rule, String id, String name, String type) {
		ArrayList<String> ltRule = new ArrayList<String>();
		TreeMap<Integer,ArrayList<Token>> tokenmap = new TreeMap<Integer,ArrayList<Token>>();
		for (Token token : tokens) {
			tokenmap = (TreeMap)smartPut(tokenmap,token.offset,token);
		}
		
		if (name != null || id != null) {
			ltRule.add("<rule id=\"" + id + "\" name=\"" + name + "\">");
		} else {
			ltRule.add("<rule>");
		}
		
		int mark = getPositionOfTarget(tokens);
		ltRule.add(firstIndent + "<pattern mark=\"" + mark + "\">");
		for (Iterator<Integer> iter = tokenmap.keySet().iterator(); iter.hasNext();) {
			int key = iter.next();
			ArrayList<Token> value = tokenmap.get(key);
			// remove duplicates, so we don't have unnecessary "and"s floating around
			value = removeExtraEmptyTokens(value);
			if (value.size() == 1) {
				Token token = value.get(0);
				ltRule = addCgToken(ltRule,token,secondIndentInt);
			}
			// if the number of tokens at the given offset is more than 1, we have to and them together
			else {
				ltRule.add(secondIndent + "<and>");
				
				for (Token token : value) {
					ltRule = addCgToken(ltRule,token,thirdIndentInt);
				}
				ltRule.add(secondIndent + "</and>");
			}
			
		}
		ltRule.add(firstIndent + "</pattern>");
		// REMOVE
		if (type.equals("K_REMOVE")) {
			ltRule.add(firstIndent + "<disambig action=\"remove\">" + removeTarget(target) + "</disambig>");
		}
		// SELECT
		else if (type.equals("K_SELECT")) {
			ltRule.add(firstIndent + filterTarget(target, mark + 1));
		} 
		// MAP
		else if (type.equals("K_MAP")) {
			ltRule.add(firstIndent + "<disambig action=\"add\" postag=\"" + addRegexp(rule.maplist) + "\" postag_regexp=\"yes\"/>");
		}
		ltRule.add("</rule>");
		return ltRule;
	}
	
	/**
	 * Helper for getRuleByType
	 * @param ltRule
	 * @param token
	 * @param indent
	 * @return
	 */
	public ArrayList<String> addCgToken(ArrayList<String> ltRule, Token token, int indent) {
		String postags = postagsToString(token.postags);
		String baseforms = glueWords(cleanForms(token.baseforms));
		String surfaceforms = glueWords(cleanForms(token.surfaceforms));
		CgCompositeTag[] compositeTags = token.compositeTags;
		// should never have both composite tags and any of the postags/baseforms/surfaceforms
		// also, if there're composite tags, they should have the same postags component, and not both base and surface forms
		if (compositeTags.length != 0) {
			ArrayList<String> baseformsList = new ArrayList<String>();
			ArrayList<String> surfaceformsList = new ArrayList<String>();
			for (CgCompositeTag ctag : compositeTags) {
				CgCompositeTag postagCompiled = new CgCompositeTag();
				for (CgTag tag : ctag.tags) {
					if (isPostag(tag.tag)) {
						postagCompiled.addTag(tag);
					} else if (isBaseForm(tag.tag)) {
						baseformsList.add(tag.tag);
					} else if (isSurfaceForm(tag.tag)) {
						surfaceformsList.add(tag.tag);
					}
				}
				postags = compositePostagToString(postagCompiled);
			}
			baseforms = glueWords(cleanForms(baseformsList.toArray(new String[baseformsList.size()])));
			surfaceforms = glueWords(cleanForms(surfaceformsList.toArray(new String[surfaceformsList.size()])));
		}
		
		boolean careful = token.careful;
		boolean negate = token.negate;
		String exceptions = token.exceptionString;
		int skip = token.skip;
		
		// the special case of the generic token:
		if (postags.equals("") && baseforms.equals("") && surfaceforms.equals("")) {
			ltRule = addToken(ltRule,baseforms,postags,exceptions,careful,false,negate,skip,indent);
			return ltRule;
		}
		
		if (!baseforms.equals("")) {
			ltRule = addToken(ltRule, baseforms, postags, exceptions, careful, true, negate, skip, indent);
		} else if (!surfaceforms.equals("")) {
			ltRule = addToken(ltRule, surfaceforms, postags, exceptions, careful, false, negate, skip, indent);
		} else {
			ltRule = addToken(ltRule, surfaceforms, postags, exceptions, careful, false, negate, skip, indent);
		}
		return ltRule;
	}
	
	/**
	 * Helper for getRuleByType
	 * @param token
	 * @return
	 */
	public String getBarrierExceptionStringFromToken(Token token) {
		boolean not = token.negate;
		boolean inflected = true;
		String barrierPos = glueWords(expandSetSets(token.barrier).getPostagsString());
		
		if (token.scanahead) {
			barrierPos = barrierPos.concat("|" + SENT_END);
		}
		String barrierToken = glueWords(token.barrier.getSingleTagBaseformsString());
		if (barrierToken.isEmpty()) {
			barrierToken = glueWords(token.barrier.getSingleTagSurfaceformsString());
			inflected = false;
		}
		String targetPos = glueWords(token.postags);
		String targetToken = glueWords(token.baseforms);
		if (targetToken.isEmpty()) {
			targetToken = glueWords(token.surfaceforms);
			if (not) inflected = false;
		}
		String postagString = "";
		String tokenString = "";
		String inflectedString = "";
		String regexpString = "";
		String postagRegexpString = "";
		if (not) {
			if (!targetPos.isEmpty()) {
				postagString = " postag=\"" + targetPos + "\"";
				if (isRegex(targetPos)) {
					postagRegexpString = " postag_regexp=\"yes\"";
				}
			}
			if (!targetToken.isEmpty()) {
				tokenString = " ".concat(targetToken);
				if (isRegex(tokenString)) {
					regexpString = " regexp=\"yes\"";
				}
			}
		} else {
			if (!barrierPos.isEmpty()) {
				postagString = " postag=\"" + barrierPos + "\"";
			}
			if (isRegex(barrierPos)) {
				postagRegexpString = " postag_regexp=\"yes\"";
			}
			if (!barrierToken.isEmpty()) {
				tokenString = barrierToken;
				if (isRegex(tokenString)) {
					regexpString = " postag_regexp=\"yes\"";
				}
			}
		}
		if (inflected) {
			inflectedString = " inflected=\"yes\"";
		}
		String retString = "<exception" + postagString + inflectedString + regexpString + postagRegexpString + " scope=\"next\">" + tokenString + "</exception>";
		return retString;
	}
	
	/** 
	 * Returns the position of the target token, which is always relative to the furthest back token in the rules
	 * @param tokens
	 * @return
	 */
	public int getPositionOfTarget(Token[] tokens) {
		Token firstToken = tokens[0];
		return -1 * firstToken.offset;
	}
	
	/**
	 * Helper that takes a normal contextual test (i.e. not a Parent or a Linked test, e.g. (1 Noun))
	 * and returns the properly filled-out Token object
	 * @param test
	 * @return
	 */
	public Token getTokenFromNormalTest(CgContextualTest test) {
		CgSet testTarget = expandSetSets(grammar.getSet(test.target));
		boolean testCareful = test.pos.contains(POS.POS_CAREFUL.value);
		int testOffset = test.offset;
		boolean testScanAhead = test.pos.contains(POS.POS_SCANFIRST.value) && testOffset >= 0;
		boolean testScanBehind = test.pos.contains(POS.POS_SCANFIRST.value) && testOffset < 0;
		boolean testNot = test.pos.contains(POS.POS_NOT.value);
		CgSet testBarrier = grammar.getSet(test.barrier);
		CgSet testCBarrier = grammar.getSet(test.cbarrier);
		CgSet barrier = null;
		boolean cbarrier = false;
		if (testBarrier != null && testCBarrier != null) {
			System.err.println("Can't have both a barrier and a careful barrier");
			System.exit(1);
		}
		if (testBarrier != null) {
			barrier = testBarrier;
			cbarrier = false;
		} else if (testCBarrier != null) {
			barrier = testCBarrier;
			cbarrier = true;
		} else {
			barrier = new CgSet();
			cbarrier = false;
		}
		if (test.line == 548 && test.offset == 1) {
			System.out.println();
		}
		return new Token(testTarget,testCareful,testOffset,testScanAhead,testScanBehind,barrier,cbarrier,0,testNot);
		
	}
	
	/**
	 * takes a CgSet and, if it contains nested sets, expands them according to the proper
	 * set operators (set_ops) and returns the new set.
	 */
	public CgSet expandSetSets(CgSet set) {
		CgSet newSet = new CgSet();
		newSet.line = set.line;
		newSet.type = set.type;
		newSet.name	= set.name;
		if (set.sets.isEmpty()) {
			return set;
		}
		else if (set.sets.size() > 1 && set.set_ops.isEmpty()) {
			System.err.println("Error: something wonky with the set on line " + set.line);
			System.exit(1);
		}
		else if (set.set_ops.isEmpty()) {
			CgSet expandedSet = expandSetSets(grammar.getSet(set.sets.get(0)));
			for (CgCompositeTag ctag : expandedSet.tags) {
				newSet.tags.add(ctag);
			}
			for (CgTag tag : expandedSet.single_tags) {
				newSet.single_tags.add(tag);
			}
		}
		else {
			for (int op=0;op<set.set_ops.size();op++) {
				CgSet expandedSet1 = expandSetSets(grammar.getSet(set.sets.get(op)));
				CgSet expandedSet2 = expandSetSets(grammar.getSet(set.sets.get(op+1)));
				// Cartesian set product (+)
				if (set.set_ops.get(op) == 4) {
					for (CgTag tag : expandedSet1.single_tags) {
						for (CgTag tag2 : expandedSet2.single_tags) {
							if (tag.tag.equals(tag2.tag)) {
								newSet.addTag(tag);
							} else {
								CgCompositeTag ctag = new CgCompositeTag();
								ctag.addTag(tag);
								ctag.addTag(tag2);
								newSet.addCompositeTag(ctag);
							}
						}
					}
					for (CgCompositeTag ctag : expandedSet1.tags) {
						for (CgTag tag : expandedSet2.single_tags) {
							if (ctag.tags.contains(tag)) {
								newSet.addCompositeTag(ctag);
							} else {
								CgCompositeTag ctagnew = new CgCompositeTag();
								for (CgTag tag2 : ctag.tags) {
									ctagnew.addTag(tag2);
								}
								ctagnew.addTag(tag);
								newSet.addCompositeTag(ctagnew);
							}
						}
					}
					for (CgCompositeTag ctag : expandedSet2.tags) {
						for (CgTag tag : expandedSet1.single_tags) {
							if (ctag.tags.contains(tag)) {
								newSet.addCompositeTag(ctag);
							} else {
								CgCompositeTag ctagnew = new CgCompositeTag();
								for (CgTag tag2 : ctag.tags) {
									ctagnew.addTag(tag2);
								}
								ctagnew.addTag(tag);
								newSet.addCompositeTag(ctagnew);
							}
						}
					}
					for (CgCompositeTag ctag : expandedSet1.tags) {
						for (CgCompositeTag ctag2 : expandedSet2.tags) {
							CgCompositeTag ctagnew = new CgCompositeTag();
							for (CgTag tag : ctag.tags) {
								if (!ctagnew.tags.contains(tag)) {
									ctagnew.addTag(tag);
								}
							}
							for (CgTag tag : ctag2.tags) {
								if (!ctagnew.tags.contains(tag)) {
									ctagnew.addTag(tag);
								}
							}
							newSet.addCompositeTag(ctagnew);
						}
					}
				}
				// OR or | 
				else if (set.set_ops.get(op) == 3) {
					for (CgCompositeTag ctag : expandedSet1.tags) {
						newSet.addCompositeTag(ctag);
					}
					for (CgCompositeTag ctag : expandedSet2.tags) {
						newSet.addCompositeTag(ctag);
					}
					for (CgTag tag : expandedSet1.single_tags) {
						newSet.addTag(tag);
					}
					for (CgTag tag : expandedSet2.single_tags) {
						newSet.addTag(tag);
					}
				}
			}
		}
		return newSet;
	}
	

	
	
	
	
	
	
	
	
	@Override
	public String generateName(Object ruleObject) {
		CgRule rule = (CgRule) ruleObject;
		String name = rule.name;
		if (name == null) {
			name = "rule_" + nameIndex;
			nameIndex++;
		}
		return name;
	}
	
	@Override
	public String generateId(Object ruleObject) {
		CgRule rule = (CgRule) ruleObject;
		String name = rule.name;
		if (name == null) {
			name = "rule_" + idIndex;
			idIndex++;
		}
		return name;
	}
	
	@Override
	public String getOriginalRuleString(Object ruleObject) {
		CgRule rule = (CgRule) ruleObject;
		return lines[rule.line];
	}
	
	@Override
	public String[] getAcceptableFileTypes() {
		String[] ft = {"default"};
		return ft;
	}
	
	
	/**
	 * Token class: contains the elements that will be written into <token> elements in the resulting LT rule
	 * @author mbryant
	 *
	 */
	public class Token implements Comparable<Token> {

		// target consists of the expanded CgSet, with no nested sets.
		public CgSet target;
		public String[] postags;
		public String[] surfaceforms;
		public String[] baseforms;
		public CgCompositeTag[] compositeTags;
		public boolean careful;
		public int offset;
		public boolean scanahead;
		public boolean scanbehind;
		public CgSet barrier;
		public boolean cbarrier;
		public int skip;
		public boolean negate;
		public int relativeOffset;
		public Token nextToken;
		public Token prevToken;
		public String exceptionString;
		
		public Token() {
			// nothing
		}
		
		// copy constructor
		public Token(Token another) {
			this.target = new CgSet(another.target);
			this.postags = target.getPostagsString();
			this.surfaceforms = target.getSingleTagSurfaceformsString();
			this.baseforms = target.getSingleTagBaseformsString();
			this.compositeTags = target.getCompositeTags();
			this.careful = another.careful;
			this.offset = another.offset;
			this.scanahead = another.scanahead;
			this.scanbehind = another.scanbehind;
			this.barrier = new CgSet(another.barrier);
			this.cbarrier = another.cbarrier;
			this.skip = another.skip;
			this.negate = another.negate;
			this.nextToken = another.nextToken;
			this.prevToken = another.prevToken;
			this.relativeOffset = another.relativeOffset;
			this.exceptionString = another.exceptionString;
		}
		
		public Token(CgSet target,
					 boolean careful, int offset, boolean scanahead, boolean scanbehind,
					 CgSet barrier, boolean cbarrier, int skip, boolean negate) {
			this.target = target;
			this.postags = target.getPostagsString();
			this.surfaceforms = target.getSingleTagSurfaceformsString();
			this.baseforms = target.getSingleTagBaseformsString();
			this.compositeTags = target.getCompositeTags();
			this.careful = careful;
			this.offset = offset;
			this.scanahead = scanahead;
			this.scanbehind = scanbehind;
			this.barrier = barrier;
			this.cbarrier = cbarrier;
			this.skip = skip;
			this.negate = negate;
			this.nextToken = null;
			this.prevToken = null;
			this.relativeOffset = 0;
		}
		
		public int compareTo(Token token) {
			if (this.offset < token.offset) {
				return -1;
			} else if (this.offset == token.offset) {
				return 0;
			} else {
				return 1;
			}
		}
		
		public boolean isEmpty() {
			return (this.postags.length == 0) &&
				   (this.baseforms.length == 0) &&
				   (this.surfaceforms.length == 0) &&
				   (this.compositeTags.length == 0);
		}
		
		@Override
		public String toString() {
			StringBuilder sb = new StringBuilder();
			for (String postag : this.postags) {
				sb.append(postag + " ");
			}
			for (String baseform : this.baseforms) {
				sb.append(baseform + " ");
			}
			for (String surfaceform : this.surfaceforms) {
				sb.append(surfaceform + " ");
			}
			for (CgCompositeTag ctag : this.compositeTags) {
				sb.append(ctag.toString() + " ");
			}
			return sb.toString();
		}
	}
	
	
	// ** SOME STATIC STRING-CHECKING METHODS **
	
	public static boolean isPostag(String tag) {
		return !(tag.matches("\"\\<.*\\>\"r?i?") || tag.matches("\".*\"r?i?"));
	}
	
	public static boolean isSurfaceForm(String form) {
		return (form.matches("\"\\<.*\\>\"r?i?") || form.matches("\"\"\\<.*\\>\"r?i?\""));
	}
	
	public static boolean isBaseForm(String form) {
		return (form.matches("\"[^<]*[^>]\"r?i?") || form.matches("\"\"[^<]*[^>]\"r?i?\""));
	}
	
	public static boolean isCompositePostag(CgCompositeTag ctag) {
		for (CgTag tag : ctag.tags) {
			if (isBaseForm(tag.tag) || isSurfaceForm(tag.tag)) {
				return false;
			}
		}
		return true;
	}
	
	public static boolean sameStrings(String[] s1, String[] s2) {
		if (s1.length != s2.length) {
			return false;
		} else {
			for (String ss1 : s1) {
				for (String ss2 : s2) {
					if (!ss1.equals(ss2)) return false;
				}
			}
		}
		return true;
	}
	
	
	
	// ** METHODS THAT RETURN WRITABLE FORMS FOR CONSTRAINT GRAMMAR TAGS/SETS **
	
	/** 
	 * Removes the quotation marks, angle brackets, and suffixes from surface/base forms
	 * @param words
	 * @return
	 */
	public static String[] cleanForms(String[] words) {
		for (int i=0;i<words.length;i++) {
			words[i] = words[i].replaceAll(">\"r?i?", "").replaceAll(">\"", "").replaceAll("\"<","").replaceAll("\"r?i?", "");
		}
		return words;
	}
	
	// this is problematic, in that I don't know how to write these regular expressions when we have
	// different morphological tag syntax, as Polish or French.
	public static String filterRegexp(CgSet target) {
		String[] postags = target.getPostagsString();
		String postagString = glueWords(postags);
		postagString = "(".concat(postagString).concat(")");
		String postagRegexp = toStringRegexpFormat(postagString);
		
		StringBuilder sb = new StringBuilder();
		sb.append("(?!");
		sb.append(postagRegexp);
		sb.append(").*");
		return sb.toString();
	}
	
	public static String filterTarget(CgSet target, int targetNo) {
		// <match no=\"" + (mark + 1) + "\" postag=\"" + replaceRegexp(target) + "\" postag_regexp=\"yes\"/>
		StringBuilder sb = new StringBuilder();
		
		String[] lemmas = cleanForms(target.getSingleTagBaseformsString());
		String[] postags = cleanForms(target.getPostagsString());
		CgCompositeTag[] compositeTags = target.getCompositeTags();
		String[] surfaceforms = cleanForms(target.getSingleTagSurfaceformsString());
		
		if (lemmas.length > 0 && (compositeTags.length > 0 || postags.length > 0 || surfaceforms.length > 0)) {
			System.err.println("Error: something went wrong here.");
		}
		
		// assumes there can't be both lemmas and postags
		if (lemmas.length > 0) {
			sb.append("<disambig action=\"filter\"><match no=\"" + targetNo + "\">" + glueWords(lemmas) + "</match></disambig>");
		}
		if (postags.length > 0) {
			String postagRegexp = "";
			if (isRegex(glueWords(postags))) {
				postagRegexp = " postag_regexp=\"yes\"";
			}
			sb.append("<disambig postag=\"" + glueWords(postags) + "\"" + postagRegexp + "/>");
		}
		
		return sb.toString();
		
	}
	
	// formats the target for use with disambiguation action="remove" keyword
	// assuming they support regular expressions, which they currently don't but kind of have to in order to work
	public static String removeTarget(CgSet target) {
		StringBuilder sb = new StringBuilder();
		sb.append("<wd ");
		// these should always be the correct lengths, because if they weren't, they should have been split earlier.
		String[] lemmas = target.getSingleTagBaseformsString();
		String[] postags = target.getPostagsString();
		CgCompositeTag[] compositeTags = target.getCompositeTags();
		String[] surfaceforms = target.getSingleTagSurfaceformsString();
		
		if (lemmas.length > 0 && (compositeTags.length > 0 || postags.length > 0 || surfaceforms.length > 0)) {
			System.err.println("Error: something went wrong here.");
		}
		
		if (lemmas.length > 0) {
			sb.append("lemma=" + glueWords(lemmas));
		}
		if (postags.length > 0) {
			sb.append("pos=\"" + glueWords(postags) + "\"");
		}
		
		sb.append("/>");
		return sb.toString();
	}
	
	public static String replaceRegexp(CgSet target) {
		String[] postags = target.getPostagsString();
		String postagString = glueWords(postags);
		postagString = "(".concat(postagString).concat(")");
		String postagRegexp = toStringRegexpFormat(postagString);
		return postagRegexp;
		
	}
	
	public static String addRegexp(CgSet target) {
		String[] postags = target.getSingleTagPostagsString();
		if (postags.length != 1) {
			System.err.println("Error: trying to map more than one mapping tag on line " + target.line);
			System.exit(1);
		}
		String postag = postags[0];
		return postag;
	}
	
	// ** LANGUAGE-DEPENDENT METHODS **
	
	public static String postagsToString(String[] postags) {
		
		if (postags.length == 0) {
			return "";
		}
		/*
		StringBuilder sb = new StringBuilder();
		sb.append("(.*" + tagDelimiter + ")?");
		String postagsGlued = glueWords(postags);
		sb.append(postagsGlued);
		sb.append("(" + tagDelimiter + ".*)?");
		*/
		// The simplest possible way to do this
		StringBuilder sb = new StringBuilder();
		for (String pos : postags) {
			sb.append(pos + "|");
		}
		String ret = sb.toString();
		return ret.substring(0,ret.length() - 1);
	}
	
	public static String toStringRegexpFormat(String t) {
		/*
		StringBuilder sb = new StringBuilder();
		sb.append("^(.*" + tagDelimiter + ")?");
		sb.append(t);
		sb.append("(" + tagDelimiter + ".*)?$");
		return sb.toString();
		*/
		return t;
		
	}
	
	//TODO: only a stand-in for now; depends on the language-specific multiple-tag string representation
	// some complicated regex stuff going on here.
	// only should be applied to composite postags. Composite tags with postags + s/b-forms get split in different ways
	public static String compositePostagToString(CgCompositeTag ctag) {
		/*
		StringBuilder sb = new StringBuilder();
		String gluedPostag = "";
		int noComponents = 0;
		for (CgTag tag : ctag.tags) {
			if (isPostag(tag.tag)) {
				gluedPostag = gluedPostag.concat(tag.tag).concat("|");
				noComponents++;
			}
		}
		gluedPostag = "(".concat(gluedPostag.substring(0, gluedPostag.length() - 1)).concat(")");
		sb.append("^(.*" + tagDelimiter + ")?");
		for (int i=0;i<noComponents;i++) {
			sb.append(gluedPostag);
			if (i < noComponents - 1) {
				sb.append("(" + tagDelimiter + ".*" + tagDelimiter + "?)");
			}			
		}
		sb.append("(" + tagDelimiter + ".*)?$");
		*/
		// The simplest possible way to represent this
		StringBuilder sb = new StringBuilder();
		for (CgTag tag : ctag.tags) {
			if (isPostag(tag.tag)) {
				sb.append(tag.tag);
				sb.append(tagDelimiter);
			}
		}
		sb.deleteCharAt(sb.length() - 1);
		sb.append(".*");
		return sb.toString();
	}
	
	// language-specific way of representing tags. Relies on a tagDelimiter, which appears to be different in different languages.
	// For example, in French, the tags look like: "N f s" (i.e. Noun feminine singular). But in Polish they look like "N:f:s" (same).
	// so this syntax takes care of both of those cases (as long as you change the tagDelimiter), wherever in the postag string 
	// the tag appears.
	public static String tagToString(CgTag tag) {
		/*
		if (tag.tag.equals(sent_end)) {
			return sent_end;
		}
		if (tag.tag.equals(sent_start)) {
			return sent_start;
		}
		StringBuilder sb = new StringBuilder();
		String t = tag.tag;
		sb.append("^" + t + "$|");											// the tag is the only tag
		sb.append("^" + t + tagDelimiter + ".*|");							// the tag is the first tag
		sb.append(".*" + tagDelimiter + t + tagDelimiter + ".*|");			// the tag is in the middle somewhere
		sb.append(".*" + tagDelimiter + t + "$");							// the tag is the last tag
		
		return sb.toString();
		*/
		// The simplest possible way to do this
		return tag.tag;
	}
	
	/**
	 * Helper to properly put an item to a map where the values are lists
	 * @param <K>
	 * @param <V>
	 * @param map
	 * @param key
	 * @param value
	 * @return
	 */
	public static <K,V> Map<K,ArrayList<V>> smartPut(Map<K,ArrayList<V>> map, K key, V value) {
		if (map.containsKey(key)) {
			ArrayList<V> original = map.get(key);
			original.add(value);
			map.put(key, original);
		} else {
			ArrayList<V> newcollection = new ArrayList<V>();
			newcollection.add(value);
			map.put(key, newcollection);
		}
		return map;
	}

}
